energy <- read_csv(here("data", "energy.csv"))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## month = col_character(),
## res_total = col_double(),
## ind_total = col_double()
## )
energy_ts <- energy %>%
mutate(date = tsibble::yearmonth(month)) %>%
as_tsibble(key = NULL, index = date)
ggplot(data = energy_ts, aes(x = date, y = res_total)) +
geom_line() +
labs(y = "Residential energy consumption \n (Trillion BTU)")
energy_ts %>%
gg_season(y = res_total) +
theme_minimal() +
labs(x = "month",
y = "residential energy consumption (trillion BTU)")
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="
## Warning in NextMethod("["): Incompatible methods (">=.Date", ">=.vctrs_vctr")
## for ">="
## Warning in NextMethod("["): Incompatible methods ("<=.Date", "<=.vctrs_vctr")
## for "<="
energy_ts %>% gg_subseries(res_total)
# Find STL decomposition
dcmp <- energy_ts %>%
model(STL(res_total ~ season()))
# View the components
components (dcmp)
## # A dable: 538 x 7 [1M]
## # Key: .model [1]
## # STL Decomposition: res_total = trend + season_year + remainder
## .model date res_total trend season_year remainder season_adjust
## <chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 STL(res_total ~… 1973 Jan 1932. 1257. 684. -9.21 1248.
## 2 STL(res_total ~… 1973 Feb 1687. 1253. 465. -31.3 1222.
## 3 STL(res_total ~… 1973 Mar 1497. 1250. 242. 5.67 1255.
## 4 STL(res_total ~… 1973 Apr 1178. 1246. -62.9 -5.49 1241.
## 5 STL(res_total ~… 1973 May 1015. 1242. -256. 28.7 1271.
## 6 STL(res_total ~… 1973 Jun 934. 1238. -313. 8.89 1247.
## 7 STL(res_total ~… 1973 Jul 981. 1234. -231. -22.0 1212.
## 8 STL(res_total ~… 1973 Aug 1019. 1231. -225. 13.7 1244.
## 9 STL(res_total ~… 1973 Sep 957. 1227. -314. 43.5 1271.
## 10 STL(res_total ~… 1973 Oct 992. 1224. -271. 39.4 1263.
## # … with 528 more rows
# Visualize the decomposed components
components(dcmp) %>% autoplot() +
theme_minimal()
energy_ts %>%
ACF(res_total) %>%
autoplot()
# Create the model:
energy_fit <- energy_ts %>%
model(
ets = ETS(res_total ~ season("M"))
)
# Forecast using the model 10 years into the future:
energy_forecast <- energy_fit %>%
forecast(h = "10 years")
# Plot just the forecasted values (with 80% and 95% CIs):
energy_forecast %>%
autoplot()
# Or plot it added to the original data:
energy_forecast %>%
autoplot(energy_ts)
# Append the predicted values (and residuals) to original energy data
energy_predicted <- broom::augment(energy_fit)
# Use View(energy_predicted) to see the resulting data frame
# Plot the actual energy values (res_total), and the predicted values (stored as .fitted) atop them
ggplot(data = energy_predicted) +
geom_line(aes(x = date, y = res_total)) +
geom_line(aes(x = date, y = .fitted), color = "red")
Residuals should be uncorrelated, centered at 0, and ideally normally distributed.
ggplot(data = energy_predicted,
aes(x = .resid)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
A. California county outlines (polygons)
# Read in the CA county shapefile
ca_counties <- read_sf(here("data","ca_counties","CA_Counties_TIGER2016.shp"))
ca_subset <- ca_counties %>%
select(NAME, ALAND) %>%
rename(county_name = NAME, land_area = ALAND)
# Check and set the CRS
ca_subset %>% st_crs()
## Coordinate Reference System:
## User input: WGS 84 / Pseudo-Mercator
## wkt:
## PROJCRS["WGS 84 / Pseudo-Mercator",
## BASEGEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]],
## CONVERSION["Popular Visualisation Pseudo-Mercator",
## METHOD["Popular Visualisation Pseudo Mercator",
## ID["EPSG",1024]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["False easting",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["World - 85°S to 85°N"],
## BBOX[-85.06,-180,85.06,180]],
## ID["EPSG",3857]]
# Plot the California counties using geom_sf()
ggplot(data = ca_subset) +
geom_sf(aes(fill = land_area),
color = "white",
size = 0.1) +
theme_void() +
scale_fill_gradientn(colors = c("cyan", "blue", "purple"))
B. Invasive red sesbania records (spatial points)
# Read in the data
sesbania <- read_sf(here("data", "red_sesbania", "ds80.shp"))
# Check the CRS:
sesbania %>% st_crs()
## Coordinate Reference System:
## User input: Custom
## wkt:
## PROJCRS["Custom",
## BASEGEOGCRS["NAD83",
## DATUM["North American Datum 1983",
## ELLIPSOID["GRS 1980",6378137,298.257222101,
## LENGTHUNIT["metre",1]],
## ID["EPSG",6269]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["Degree",0.0174532925199433]]],
## CONVERSION["unnamed",
## METHOD["Albers Equal Area",
## ID["EPSG",9822]],
## PARAMETER["Latitude of false origin",0,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8821]],
## PARAMETER["Longitude of false origin",-120,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8822]],
## PARAMETER["Latitude of 1st standard parallel",34,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8823]],
## PARAMETER["Latitude of 2nd standard parallel",40.5,
## ANGLEUNIT["Degree",0.0174532925199433],
## ID["EPSG",8824]],
## PARAMETER["Easting at false origin",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8826]],
## PARAMETER["Northing at false origin",-4000000,
## LENGTHUNIT["metre",1],
## ID["EPSG",8827]]],
## CS[Cartesian,2],
## AXIS["(E)",east,
## ORDER[1],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]],
## AXIS["(N)",north,
## ORDER[2],
## LENGTHUNIT["metre",1,
## ID["EPSG",9001]]]]
# Update the CRS so it matches the CA counties CRS using st_transform()
sesbania <- st_transform(sesbania, 3857)
# Then check it:
sesbania %>% st_crs()
## Coordinate Reference System:
## User input: EPSG:3857
## wkt:
## PROJCRS["WGS 84 / Pseudo-Mercator",
## BASEGEOGCRS["WGS 84",
## DATUM["World Geodetic System 1984",
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## ID["EPSG",4326]],
## CONVERSION["Popular Visualisation Pseudo-Mercator",
## METHOD["Popular Visualisation Pseudo Mercator",
## ID["EPSG",1024]],
## PARAMETER["Latitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8801]],
## PARAMETER["Longitude of natural origin",0,
## ANGLEUNIT["degree",0.0174532925199433],
## ID["EPSG",8802]],
## PARAMETER["False easting",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8806]],
## PARAMETER["False northing",0,
## LENGTHUNIT["metre",1],
## ID["EPSG",8807]]],
## CS[Cartesian,2],
## AXIS["easting (X)",east,
## ORDER[1],
## LENGTHUNIT["metre",1]],
## AXIS["northing (Y)",north,
## ORDER[2],
## LENGTHUNIT["metre",1]],
## USAGE[
## SCOPE["unknown"],
## AREA["World - 85°S to 85°N"],
## BBOX[-85.06,-180,85.06,180]],
## ID["EPSG",3857]]
# Plot them together!
ggplot() +
geom_sf(data = ca_subset) +
geom_sf(data = sesbania, size = 1, color = "red")
# Find the count of red sesbania observed locations in this dataset by county
# Spatial joins
ca_sesbania <- ca_subset %>%
st_join(sesbania)
sesbania_counts <- ca_sesbania %>%
count(county_name)
# Chloropleth using the number of records for red sesbania as the fill color
ggplot(data = sesbania_counts) +
geom_sf(aes(fill = n), color = "white", size = 0.1) +
scale_fill_gradientn(colors = c("lightgray", "orange", "red")) +
theme_minimal() +
labs(fill = "Number of S. punicea records")
# Suset of sesbania point locations only in Solano County
solano_sesbania <- sesbania %>%
filter(COUNTY == "Solano")
# Onlykeep Solano polygon from California County data
solano <- ca_subset %>%
filter(county_name == "Solano")
ggplot()+
geom_sf(data = solano) +
geom_sf(data = solano_sesbania)
C. Making an interactive map with {tmap}
# Set the viewing mode to "interactive":
tmap_mode(mode = "view")
## tmap mode set to interactive viewing
# Then make a map (with the polygon fill color updated by variable 'land_area', updating the color palette to "BuGn"), then add another shape layer for the sesbania records (added as dots):
tm_shape(ca_subset) +
tm_fill("land_area", palette = "BuGn") +
tm_shape(sesbania) +
tm_dots()